# 读取小文件的API
#coding:utf8
from pyspark import SparkContext,SparkConf

if __name__ == '__main__':
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    sc = SparkContext(conf=conf)

    # 读取文件夹
    rdd = sc.wholeTextFiles("../data/input/tiny_files")
    # 读取到的内容是一个二元元组队列，元组是(文件名,内容)
    print(rdd.map(lambda x:x[1]).collect())

